Take-home_Ex02a1

Libraries

# Load necessary libraries
library(sf)
Warning: package 'sf' was built under R version 4.2.3
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.2.3
library(dplyr)
Warning: package 'dplyr' was built under R version 4.2.3

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(spdep)
Warning: package 'spdep' was built under R version 4.2.3
Loading required package: spData
To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`
library(tmap)
Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
remotes::install_github('r-tmap/tmap')
library(leaflet)
Warning: package 'leaflet' was built under R version 4.2.3
library(reshape2)
library(stats)
library(cluster)
library(tidyr)
Warning: package 'tidyr' was built under R version 4.2.3

Attaching package: 'tidyr'
The following object is masked from 'package:reshape2':

    smiths
library(rmapshaper)

Data Loading

We load the shapefile for Thailand’s administrative boundaries and the tourism dataset.

shapefile_path <- '/Users/sharon/OneDrive - Singapore Management University/ISSS626 Data/Take_home_Ex/Take_home_Ex02/data/tha_adm_rtsd_itos_20210121_shp/tha_admbnda_adm1_rtsd_20220121.shp'
gdf1 <- st_read(shapefile_path)
Reading layer `tha_admbnda_adm1_rtsd_20220121' from data source 
  `/Users/sharon/Library/CloudStorage/OneDrive-SingaporeManagementUniversity/ISSS626 Data/Take_home_Ex/Take_home_Ex02/data/tha_adm_rtsd_itos_20210121_shp/tha_admbnda_adm1_rtsd_20220121.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 77 features and 16 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 97.34336 ymin: 5.613038 xmax: 105.637 ymax: 20.46507
Geodetic CRS:  WGS 84
head(gdf1)
Simple feature collection with 6 features and 16 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 100.1913 ymin: 13.47842 xmax: 100.9639 ymax: 14.80246
Geodetic CRS:  WGS 84
  Shape_Leng Shape_Area                  ADM1_EN       ADM1_TH ADM1_PCODE
1   2.417227 0.13133873                  Bangkok  กรุงเทพมหานคร       TH10
2   1.695100 0.07926199             Samut Prakan    สมุทรปราการ       TH11
3   1.251111 0.05323766               Nonthaburi         นนทบุรี       TH12
4   1.884945 0.12698345             Pathum Thani        ปทุมธานี       TH13
5   3.041716 0.21393797 Phra Nakhon Si Ayutthaya พระนครศรีอยุธยา       TH14
6   1.739908 0.07920961                Ang Thong        อ่างทอง       TH15
  ADM1_REF ADM1ALT1EN ADM1ALT2EN ADM1ALT1TH ADM1ALT2TH  ADM0_EN   ADM0_TH
1     <NA>       <NA>       <NA>       <NA>       <NA> Thailand ประเทศไทย
2     <NA>       <NA>       <NA>       <NA>       <NA> Thailand ประเทศไทย
3     <NA>       <NA>       <NA>       <NA>       <NA> Thailand ประเทศไทย
4     <NA>       <NA>       <NA>       <NA>       <NA> Thailand ประเทศไทย
5     <NA>       <NA>       <NA>       <NA>       <NA> Thailand ประเทศไทย
6     <NA>       <NA>       <NA>       <NA>       <NA> Thailand ประเทศไทย
  ADM0_PCODE       date    validOn validTo                       geometry
1         TH 2019-02-18 2022-01-22    <NA> MULTIPOLYGON (((100.6139 13...
2         TH 2019-02-18 2022-01-22    <NA> MULTIPOLYGON (((100.7306 13...
3         TH 2019-02-18 2022-01-22    <NA> MULTIPOLYGON (((100.3415 14...
4         TH 2019-02-18 2022-01-22    <NA> MULTIPOLYGON (((100.8916 14...
5         TH 2019-02-18 2022-01-22    <NA> MULTIPOLYGON (((100.5131 14...
6         TH 2019-02-18 2022-01-22    <NA> MULTIPOLYGON (((100.3332 14...

Load tourism dataset

tourism_data_path <- '/Users/sharon/OneDrive - Singapore Management University/ISSS626 Data/Take_home_Ex/Take_home_Ex02/data/thailand_domestic_tourism_2019_2023_ver2.csv'
tourism_df <- read.csv(tourism_data_path)
head(tourism_df)
        date province_thai              province_eng region_thai region_eng
1 2019-01-01  กรุงเทพมหานคร                   Bangkok     ภาคกลาง    central
2 2019-01-01          ลพบุรี                  Lopburi      ภาคกลาง    central
3 2019-01-01 พระนครศรีอยุธยา Phra Nakhon Si Ayutthaya      ภาคกลาง    central
4 2019-01-01         สระบุรี                 Saraburi      ภาคกลาง    central
5 2019-01-01         ชัยนาท                  Chainat      ภาคกลาง    central
6 2019-01-01        นครปฐม            Nakhon Pathom      ภาคกลาง    central
            variable value
1 ratio_tourist_stay 93.37
2 ratio_tourist_stay 61.32
3 ratio_tourist_stay 73.37
4 ratio_tourist_stay 67.33
5 ratio_tourist_stay 79.31
6 ratio_tourist_stay 71.70

Data Preparation

We will merge the tourism data with the province-level shapefile using the province names in English.

The current dataset uses the first day of each month to represent the date, so we will extract the year and month to perform the analysis.

# Convert the 'date' column to Date format if it's not already
tourism_df$date <- as.Date(tourism_df$date, format = "%Y-%m-%d")

# Extract year and month from the date
tourism_df$year <- format(tourism_df$date, "%Y")
tourism_df$month <- format(tourism_df$date, "%m")

# Merge the datasets on province name
merged_data <- merge(gdf1, tourism_df, by.x = "ADM1_EN", by.y = "province_eng")

# Display the first few rows of the merged data
head(merged_data)
Simple feature collection with 6 features and 24 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 100.3279 ymin: 13.49339 xmax: 100.9385 ymax: 13.9552
Geodetic CRS:  WGS 84
  ADM1_EN Shape_Leng Shape_Area      ADM1_TH ADM1_PCODE ADM1_REF ADM1ALT1EN
1 Bangkok   2.417227  0.1313387 กรุงเทพมหานคร       TH10     <NA>       <NA>
2 Bangkok   2.417227  0.1313387 กรุงเทพมหานคร       TH10     <NA>       <NA>
3 Bangkok   2.417227  0.1313387 กรุงเทพมหานคร       TH10     <NA>       <NA>
4 Bangkok   2.417227  0.1313387 กรุงเทพมหานคร       TH10     <NA>       <NA>
5 Bangkok   2.417227  0.1313387 กรุงเทพมหานคร       TH10     <NA>       <NA>
6 Bangkok   2.417227  0.1313387 กรุงเทพมหานคร       TH10     <NA>       <NA>
  ADM1ALT2EN ADM1ALT1TH ADM1ALT2TH  ADM0_EN   ADM0_TH ADM0_PCODE     date.x
1       <NA>       <NA>       <NA> Thailand ประเทศไทย         TH 2019-02-18
2       <NA>       <NA>       <NA> Thailand ประเทศไทย         TH 2019-02-18
3       <NA>       <NA>       <NA> Thailand ประเทศไทย         TH 2019-02-18
4       <NA>       <NA>       <NA> Thailand ประเทศไทย         TH 2019-02-18
5       <NA>       <NA>       <NA> Thailand ประเทศไทย         TH 2019-02-18
6       <NA>       <NA>       <NA> Thailand ประเทศไทย         TH 2019-02-18
     validOn validTo     date.y province_thai region_thai region_eng
1 2022-01-22    <NA> 2020-02-01  กรุงเทพมหานคร     ภาคกลาง    central
2 2022-01-22    <NA> 2021-03-01  กรุงเทพมหานคร     ภาคกลาง    central
3 2022-01-22    <NA> 2022-04-01  กรุงเทพมหานคร     ภาคกลาง    central
4 2022-01-22    <NA> 2022-02-01  กรุงเทพมหานคร     ภาคกลาง    central
5 2022-01-22    <NA> 2019-09-01  กรุงเทพมหานคร     ภาคกลาง    central
6 2022-01-22    <NA> 2021-11-01  กรุงเทพมหานคร     ภาคกลาง    central
            variable        value year month                       geometry
1 no_tourist_foreign 1.266425e+06 2020    02 MULTIPOLYGON (((100.6139 13...
2     no_tourist_all 1.985954e+06 2021    03 MULTIPOLYGON (((100.6139 13...
3    revenue_foreign 4.543890e+09 2022    04 MULTIPOLYGON (((100.6139 13...
4     no_tourist_all 2.501208e+06 2022    02 MULTIPOLYGON (((100.6139 13...
5 ratio_tourist_stay 7.832000e+01 2019    09 MULTIPOLYGON (((100.6139 13...
6 no_tourist_foreign 7.921700e+04 2021    11 MULTIPOLYGON (((100.6139 13...

Exploratory Data Analysis

We will visualize the distribution of tourist arrivals across provinces using an interactive map.

merged_data_simplified <- ms_simplify(merged_data, keep = 0.05) 

# Create a choropleth map to visualize tourist arrivals across provinces
tmap_mode("view") # Interactive map mode
tmap mode set to interactive viewing
tm_shape(merged_data_simplified) +
  tm_polygons("value", palette = "OrRd", title = "Tourist Arrivals (2019-2023)") +
  tm_layout(title = "Tourist Arrivals by Province in Thailand")
# Create an interactive map using Leaflet
leaflet(merged_data_simplified) %>%
  addTiles() %>%
  addPolygons(fillColor = ~colorNumeric("viridis", merged_data$value)(value),
              color = "#BDBDC3",
              fillOpacity = 0.7,
              weight = 1) %>%
  addLegend(pal = colorNumeric("viridis", merged_data$value),
            values = merged_data$value,
            title = "Tourist Arrivals")